/*//////////////////////////////////////////////////////

The Gender Investment Gap over the Life-Cycle

.do-file to read in simulated data from model and run
regressions on simulated dataset (Table 4 in paper)

* Note: regression are done for singles, 
this file imports information for singles only

This version: May 2024

//////////////////////////////////////////////////////
*/

	////// housekeeping /////
	
	clear all
	cls 
	
	// !!!!!!!!!!!!!!!! SPECIFY PATHS HERE !!!!!!!!!!!!!!!!!!!!!!!!!!!
	
	// this path is where results will be stored
	global resultpath ""
	
	// this path is where .csv file (exported from model results) are stored
	global simdatapath ""


	///// import simulated data on equity shares /////

	// single men
    import delimited "$simdatapath\share_path_sm.csv"
	
	drop v1 v38
	drop if _n == 1
	
	* code all observations for couples as missing
	foreach num of numlist 2/37 {
	replace v`num' = . if v`num' == -9.00e+11
	}
	
	* individual identifier
	gen id = _n
	
	* reshape data
	reshape long v, i(id) j(age)

	* modify variables
	replace age = age + 28
	rename v share
	
	gen SMP = 1 if share > 0 & share != . 
	replace SMP = 0 if share == 0
	
	* save data
	gen gender = 0
	save "$resultpath\share_sm", replace
	
	// single women 
    import delimited "$simdatapath\share_path_sf.csv", clear
	
	drop v1 v38
	drop if _n == 1
	
	* code all observations for couples as missing
	foreach num of numlist 2/37 {
	replace v`num' = . if v`num' == -9.00e+11
	}
	
	* individual identifier
	gen id = _n +25000
	
	* reshape data
	reshape long v, i(id) j(age)
	
	* modify variables
	replace age = age + 28
	rename v share
	
	gen SMP = 1 if share > 0 & share != . 
	replace SMP = 0 if share == 0
	
	* save data
	gen gender = 1
	save "$resultpath\share_sf", replace
	
	
	///// import simulated data on asset holdings /////
	
	// single men
    import delimited "$simdatapath\grid_path_m.csv", clear
	
	drop v1 v38
	drop if _n == 1
	
	* code all observations for couples as missing
	foreach num of numlist 2/37 {
	replace v`num' = . if v`num' ==  -4.50e+15
	}
	
	* individual identifier
	gen id = _n
	
	*reshape data
	reshape long v, i(id) j(age)
	
	*modify variables
	replace age = age + 28
	rename v wealth
	
	* save data
	gen gender = 0
	save "$resultpath\wealth_sm", replace
	
	// single women
    import delimited "$simdatapath\grid_path_f.csv", clear
	
	drop v1 v38
	drop if _n == 1
	
	* code all observations for couples as missing
	foreach num of numlist 2/37 {
	replace v`num' = . if v`num' ==  -4.50e+15
	}
	
	* individual identifier
	gen id = _n + 25000
	
	*reshape data
	reshape long v, i(id) j(age)
	
	*modify variables
	replace age = age + 28
	rename v wealth
	
	* save data
	gen gender = 1
	save "$resultpath\wealth_sf", replace
	
	
	///// import simulated data on income /////
	
	// single men
    import delimited "$simdatapath\chain_prodm.csv", clear
	
	drop v1 v38
	drop if _n == 1
	
	* individual identifier
	gen id = _n
	
	* reshape data
	reshape long v, i(id) j(age)
	
	* modify variables
	replace age = age + 28
	rename v prod
	
	* save data
	gen gender = 0
	save "$resultpath\prod_sm", replace
	
	// single women
    import delimited "$simdatapath\chain_prodf.csv", clear
	
	drop v1 v38
	drop if _n == 1
	
	* individual identifier
	gen id = _n + 25000
	
	* reshape data
	reshape long v, i(id) j(age)
	
	* modify variables
	replace age = age + 28
	rename v prod
	
	* save data
	gen gender = 1
	save "$resultpath\prod_sf", replace
	

	//////  merge all information together ////
	
	//  single men 
	use "$resultpath\share_sm", clear
	merge 1:1 id age using "$resultpath\wealth_sm" 
	drop _merge
	merge 1:1 id age using "$resultpath\prod_sm"
	drop _merge
	
	* include information on education
	gen educcat = 0     if id <=  10250
	replace educcat = 1 if id >  10250
	
	* construct income with productivity, education & age
	gen income = exp(9.6060+0.1375)* exp(-log(2d0)) * exp(0.0592*age -0.000728*(age)^2) * exp(0.4480*educcat) *prod 

	* flat taxes 
	replace income = (1-0.27)*income
	
	* save data
	save "$resultpath\sm.dta", replace
	
	// single women
	use "$resultpath\share_sf", clear
	merge 1:1 id age using "$resultpath\wealth_sf" 
	drop _merge
	merge 1:1 id age using "$resultpath\prod_sf"
	drop _merge
	
	* include information on education
	gen educcat = 0 if id <=  11000 +25000
	replace educcat = 1 if id >  11000  +25000
	
	* construct income with productivity, education & age
	gen income = exp(9.6060+0.1375-0.7109)* exp(-log(2d0)) * exp((0.0592+0.0108716)*age -0.000728*(age^2)) * ///   
	    exp((0.4480+0.11560)*educcat) *prod 
	  	
	* flat taxes 
	replace income = (1-0.27)*income
	
	* save data
	save "$resultpath\sf.dta", replace
	
	//////  combine data on singles  ////
	
	use "$resultpath\sm.dta", clear
	append using "$resultpath\sf.dta"
	
	* only keep singles 
	drop if share == . 
	
	* save data 
	save "$resultpath\single_simulated.dta", replace
	
	// erase some datsets
	erase "$resultpath\sf.dta"
	erase "$resultpath\sm.dta"
	
	erase "$resultpath\prod_sf.dta"
	erase "$resultpath\prod_sm.dta"
	
	erase "$resultpath\share_sf.dta"
	erase "$resultpath\share_sm.dta"
	
	erase "$resultpath\wealth_sf.dta"
	erase "$resultpath\wealth_sm.dta"
	
	**********************************************************
	**********************************************************
	********** RUN REGRESSIONS ON SIMULATED DATA *************
	**********************************************************
	**********************************************************

	use "$resultpath\single_simulated.dta", clear
	
	// generate additional varialbes
	gen singleage = gender*age
	gen agesq     = age^2
	gen agecb     = age^3
	
	// inverse hyperbolic sine for income 
	ihstrans income
	rename ihs_income inc_trans
	
	// same naming as in data files
	rename gender single 
	
	rename age ageav
	rename agesq ageavsq
	rename agecb ageavcb 
	
	replace ageavsq = ageavsq/100
	replace ageavcb = ageavcb/10000
	
	// safe assets
	gen safe = (1-share)* wealth
	
	ihstrans safe	
	rename ihs_safe safe_trans
	
	// same number of observations as in acutal data (draw randomly)
	set seed 2903
	
	generate random = runiform()
	sort random 
	drop if _n > 10240
	
	**********************************************************
	****************** REGRESSIONS ***************************
	**********************************************************
	
	// Unconditional Equity Share (Table 4, column 1) 	
		
	tobit share single singleage i.educcat inc_trans safe_trans ageav ageavsq ageavcb, ll(0)
	
		quietly summarize ageav if e(sample)
		local meanwei = r(mean)
        nlcom _b[single]+ _b[singleage]* 47 
		nlcom _b[single]+ _b[singleage]* 30
		nlcom _b[single]+ _b[singleage]* 65 
		
	outreg2 using "$resultpath/single_model_fin.tex", ///
		ctitle("share - sim") addtext(Year FE, No)  ///
		keep(single ageav ageavsq ageavcb singleage safe_trans i.educcat inc_trans) replace
	
	// Stock Market Participation Rate (Table 4, column 3)
	
	reg SMP single singleage i.educcat inc_trans safe_trans ageav ageavsq ageavcb
	
		quietly summarize ageav if e(sample)
		local meanwei = r(mean)
        nlcom _b[single]+ _b[singleage]* 47 
		nlcom _b[single]+ _b[singleage]* 30
		nlcom _b[single]+ _b[singleage]* 65 
		
	outreg2 using "$resultpath/single_model_fin.tex", ///
		ctitle("SMP - sim") addtext(Year FE, No)  ///
		keep(single ageav ageavsq ageavcb singleage safe_trans i.educcat inc_trans) append
		
		
	// Conditional equity share (Table 4, column 5)
		
	reg share single singleage i.educcat inc_trans safe_trans ageav ageavsq ageavcb if share > 0
	
		quietly summarize ageav if e(sample)
		local meanwei = r(mean)
        nlcom _b[single]+ _b[singleage]* 47
		nlcom _b[single]+ _b[singleage]* 30
		nlcom _b[single]+ _b[singleage]* 65 
		
	outreg2 using "$resultpath/single_model_fin.tex", ///
		ctitle("share - sim") addtext(Year FE, No)  ///
		keep(single ageav ageavsq ageavcb singleage safe_trans i.educcat inc_trans) append

		
 

